###################
# Appendix B - Conspiracy Engagement by Account
###################

### We cannot provide the data to reproduce these figures Twitter's privacy restrictions, as the code relies on each unique user ID.
# We do, however, provide the code used to create the figures. 

rm(list = ls())
setwd() #working directory here
# Packages
pacman::p_load(rtweet,
               ggplot2,
               dplyr,
               readxl,
               writexl,
               broom,
               tidyverse,
               dotwhisker,
               lubridate,
               stargazer,
               rpart,
               caret,
               ipred,
               RColorBrewer,
               rpart.plot,
               MASS,
               randomForest,
               dplyr,
               ggpubr,
               car,
               jtools,
               forcats,
               interactions,
               effects
               
)

set.seed(0611)
#Import data
df <- read.csv() # your data here

############

# 
# Summary Stats
df %>%
  group_by(user_id) %>%
  summarise(
    N=n()) -> counts

counts %>% 
  summarise(
    Mean=mean(N),
    Median = median(N),
    SD = sd(N)
  )


summary(counts$N)

# Plot
ggplot(df, aes(fct_infreq(user_id))) + 
  geom_bar()+
  ggtitle("Frequency of Tweets by Account") +
  xlab("Accounts") +
  theme(axis.text.x = element_blank(), axis.ticks = element_blank())


### Now, re: conspiracy

# Summary Stats
df %>%
  group_by(user_id) %>%
  filter(conspir.gen == 1) %>%
  summarise(
    N=n()) -> counts

counts %>% 
  summarise(
    Mean=mean(N),
    Median = median(N),
    SD = sd(N)
  )


summary(counts$N)


# Plot
ggplot(df, aes(fct_infreq(user_id))) + 
  geom_bar()+
  ggtitle("Frequency of Tweets Engaging with Conspiracy Theory by Account") +
  xlab("Accounts") +
  theme(axis.text.x = element_blank(), axis.ticks = element_blank())

####################


df$user_id <- as.factor(df$user_id)

# Labels
labs <- c("Non-PiS Officials", "PiS Officials")
names(labs) <- c(0,1)

df %>%
  filter(!is.na(.$mass.pis)) %>%
  ggplot(aes(x = reorder(user_id, -log(followers_count)), y = log(followers_count))) +
  geom_bar(stat = "identity") +
  facet_wrap(~mass.pis, ncol = 1,
             labeller = labeller(mass.pis = labs)) +
  theme(axis.text.x = element_blank(), axis.ticks = element_blank()) +
  ggtitle("N Followers by PiS and Non-PiS") +
  xlab("Account") +
  ylab("Log(N Followers)") +
  theme(axis.text.x = element_blank(), axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5))


# Summary Stats
df %>%
  filter(mass.pis == 0) %>%
  filter(!is.na(mass.pis)) %>%
  summarise(
    Mean=mean(followers_count),
    Median = median(followers_count),
    SD = sd(followers_count)
  )

df %>%
  filter(mass.pis == 1) %>%
  summarise(
    Mean=mean(followers_count),
    Median = median(followers_count),
    SD = sd(followers_count)
  )